> dprk <- read.csv("northkorea2012.csv", header = F)
> names(dprk) <- c("name1", "name2", "hits")
> dprk$name1 <- gsub("\"", "", dprk$name1)
> dprk$name2 <- gsub("\"", "", dprk$name2)
> dprk.b <- dprk[1:820, c("name2", "name1", "hits")]
> names(dprk.b) <- c("name1", "name2", "hits")
> dprk2 <- data.frame(rbind(dprk, dprk.b))
> dprk2 <- dprk2 %>% arrange(name2)
> dprk2 <- dprk2 %>% arrange(name1)
> nwk.s.df <- reshape(dprk2, v.names = "hits", timevar = "name2", 
+ direction = "wide", idvar = "name1")
> nwk.s <- as.matrix(nwk.s.df[1:nrow(nwk.s.df), 2:ncol(nwk.s.df)])
> nwk <- network(nwk.s, directed = FALSE, matrix.type = "a", ignore.eval = FALSE, 
+ names.eval = "hits")
> dprk3 <- as.matrix(nwk, attrname = "hits", matrix.type = "edgelist")
> nwk1 <- nwk
> delete.edges(nwk1, seq_along(nwk1$mel))
> nwk1[dprk3[, 1:2], names.eval = "hits", add.edges = TRUE] <- dprk2[, 
+ 3]
> k <- max(log(as.matrix(nwk1, attrname = "hits") + 1))
> nwk.col <- matrix(gray(1 - (log(as.matrix(nwk1, attrname = "hits") + 
+ 1)/k)), nrow = network.size(nwk1))
> dprk.edge <- as.matrix(nwk1, attrname = "hits", matrix.type = "edgelist")
> cent <- data.frame(names = nwk.s.df$name1)
> cent$evcent <- evcent(nwk1, ignore.eval = FALSE)
> cent$between <- betweenness(nwk1, ignore.eval = FALSE)
> cent$degrees <- degree(nwk1, ignore.eval = FALSE)
> ishiyama <- read.csv("ishiyama2014-kju2012.csv", header = T)
> ish <- data.matrix(ishiyama[, 2:54])
> row.names(ish) <- ishiyama[, 1]
> ish.sm <- ish %*% t(ish)
> nwk.ish <- network(ish.sm, directed = TRUE, matrix.type = "a", 
+ ignore.eval = FALSE, names.eval = "hits")
> dprk.edge.ish <- as.matrix(nwk.ish, attrname = "hits", matrix.type = "edgelist")
> cent.ish <- data.frame(names = rownames(ish.sm))
> cent.ish$evcent <- evcent(nwk.ish, ignore.eval = FALSE)
> cent.ish$between <- betweenness(nwk.ish, ignore.eval = FALSE)
> cent.ish$degrees <- degree(nwk.ish, ignore.eval = FALSE)
> scraped.data <- dprk
> names(scraped.data) <- c("name1", "name2", "gchits")
> ish.data <- as.data.frame(dprk.edge.ish)
> ish.names <- as.data.frame(attr(dprk.edge.ish, "vnames"))
> ish.names$order <- rownames(ish.names)
> ish.data <- merge(ish.data, ish.names, by.x = "V1", by.y = "order")
> ish.data <- merge(ish.data, ish.names, by.x = "V2", by.y = "order")
> ish.data <- ish.data[, c(4, 5, 3)]
> names(ish.data) <- c("name1", "name2", "visits")
> combined <- merge(x = scraped.data, y = ish.data, by = c("name1", 
+ "name2"), all.x = T, all.y = T)
> combined <- merge(x = combined, y = ish.data, by.x = c("name1", 
+ "name2"), by.y = c("name2", "name1"), all.x = T, all.y = T)
> combined <- merge(x = combined, y = scraped.data, by.x = c("name1", 
+ "name2"), by.y = c("name2", "name1"), all.x = T, all.y = T)
> combined$gchits <- ifelse(is.na(combined$gchits.x), combined$gchits.y, 
+ combined$gchits.x)
> combined$visits <- ifelse(is.na(combined$visits.x), combined$visits.y, 
+ combined$visits.x)
> combined$gchits <- ifelse(is.na(combined$gchits), 0, combined$gchits)
> combined$visits <- ifelse(is.na(combined$visits), 0, combined$visits)
> dat <- combined[combined$name1 != combined$name2, ]
> ggplot(data = dat, aes(x = jitter(log(visits + 1), 3), y = jitter(log(gchits + 
+ 1), 10))) + geom_point(alpha = 0.5, col = "lightgray") + 
+ stat_smooth(method = "lm", level = 0.99) + labs(x = "\n Logged co-occurrences (Ishiyama)", 
+ y = "Logged co-occurrences (Scraped) \n ") + theme_bw()
> ggsave(filename = "AppendixFigure1.pdf", width = 8.5, height = 7, 
+ units = "in")
> std <- function(x) sd(x)/sqrt(length(x))
> means <- summarize(group_by(dat, visits), med = median(gchits), 
+ std = 1.96 * std(gchits))
> ggplot(data = dat[dat$visits < 9, ]) + geom_point(aes(x = jitter(visits), 
+ y = jitter(gchits)), alpha = 0.5, col = "lightgray") + geom_crossbar(data = means[means$visits < 
+ 9, ], aes(x = visits, y = med, ymin = med - std, ymax = med + 
+ std, group = factor(visits)), width = 0.25) + labs(x = "Co-occurrences (Ishiyama)", 
+ y = "Co-occurrences (Scraped) \n ") + coord_cartesian(ylim = c(0, 
+ 20)) + theme_bw()
> ggsave(filename = "AppendixFigure2.pdf", width = 8.5, height = 7, 
+ units = "in")
> cat("### --- Statistical summaries reported in Appendix page 1 --- ###")
> with(dat, cor(visits, gchits))
### --- Statistical summaries reported in Appendix page 1 --- ###[1] 0.5830678
> summary(lm(log(gchits + 1) ~ log(visits + 1), data = dat))

Call:
lm(formula = log(gchits + 1) ~ log(visits + 1), data = dat)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.20242 -0.54215 -0.08757  0.55647  2.34823 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)      0.54215    0.02876   18.85   <2e-16 ***
log(visits + 1)  0.92661    0.02674   34.65   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.7804 on 1638 degrees of freedom
Multiple R-squared:  0.423,	Adjusted R-squared:  0.4227 
F-statistic:  1201 on 1 and 1638 DF,  p-value: < 2.2e-16

> summary(lm(gchits ~ visits, data = dat))

Call:
lm(formula = gchits ~ visits, data = dat)

Residuals:
    Min      1Q  Median      3Q     Max 
-19.743  -2.564  -1.730   1.436  37.429 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.72977    0.20051   8.627   <2e-16 ***
visits       1.83445    0.06316  29.046   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.456 on 1638 degrees of freedom
Multiple R-squared:   0.34,	Adjusted R-squared:  0.3396 
F-statistic: 843.7 on 1 and 1638 DF,  p-value: < 2.2e-16

